library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
## ✔ tibble 1.4.1.9000 ✔ dplyr 0.7.4
## ✔ tidyr 0.7.2 ✔ stringr 1.2.0
## ✔ readr 1.1.1 ✔ forcats 0.2.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(nycflights13)
flights
nrow(flights)
## [1] 336776
We only want to see flights from american airways:
american_airways_with_delay <- filter(flights, (carrier == "AA") & (dep_delay > 0))
table(american_airways_with_delay$year)
##
## 2013
## 10162
ggplot(american_airways_with_delay, aes(x = time_hour, y = dep_delay)) +
geom_point(alpha = 0.1, color = "red") + scale_y_log10(name = "Departure Delay in Minutes") +
ggtitle("Delays over the year") +
labs(x = "Time") + geom_smooth()
## `geom_smooth()` using method = 'gam'
american_airways_with_delay %>%
mutate(more_than_10_mins_delay = dep_delay > 10) %>%
group_by(month) %>%
summarise(
share_of_delayed_fligths = mean(more_than_10_mins_delay),
maximum_delay = max(dep_delay)
)
aggregated <- flights %>%
filter(!(is.na(dep_delay))) %>%
mutate(more_than_10_mins_delay = dep_delay > 10) %>%
group_by(carrier) %>%
summarise(
share_of_delayed_fligths = mean(more_than_10_mins_delay),
maximum_delay = max(dep_delay),
number_of_flights = n()
) %>%
arrange(desc(share_of_delayed_fligths))
a <- "number_of_flights"
ggplot(aggregated, aes(x = number_of_flights)) + geom_histogram(bins = 10)
# table(flights$dep_delay, exclude = NULL)
american_airways_with_delay %>%
filter((carrier == "AA") & (dep_delay > 0))
Es gibt verschiedene Typen von daten in R
c(1, 2, 3)
## [1] 1 2 3
c(1, "a")
## [1] "1" "a"
list(1, list(2))
## [[1]]
## [1] 1
##
## [[2]]
## [[2]][[1]]
## [1] 2
letters
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
data.frame(a = 1:10, b = letters[1:10], c = "")
table(c(1, 2, 1, 1, 2))
##
## 1 2
## 3 2
Conditions
1 == 2
## [1] FALSE
1 > 2
## [1] FALSE
1 >= 2
## [1] FALSE
1 < 2
## [1] TRUE
add_one <- function(x) {
x + 1
}
add_y <- function(x, y) {
x + y
}
add_y(1, 3)
## [1] 4
add_one(4)
## [1] 5
create_plot <- function(data, variable_to_plot) {
ggplot(data, aes_string(x = variable_to_plot)) + geom_histogram(bins = 10)
}
create_plot(flights, "arr_time")
## Warning: Removed 8713 rows containing non-finite values (stat_bin).
my_files <- list.files()
for (file in my_files) {
print(my_files)
}
## [1] "flights_files" "flights.nb.html" "flights.Rmd"
## [4] "play-music.R" "r-with-marc.Rproj"
## [1] "flights_files" "flights.nb.html" "flights.Rmd"
## [4] "play-music.R" "r-with-marc.Rproj"
## [1] "flights_files" "flights.nb.html" "flights.Rmd"
## [4] "play-music.R" "r-with-marc.Rproj"
## [1] "flights_files" "flights.nb.html" "flights.Rmd"
## [4] "play-music.R" "r-with-marc.Rproj"
## [1] "flights_files" "flights.nb.html" "flights.Rmd"
## [4] "play-music.R" "r-with-marc.Rproj"